"""
Phase 10: Intangible Capital and the I/Y-K/Y Puzzle
=====================================================
Tests whether intangible capital (INTAN-Invest) resolves the disconnect
between demographics predicting I/Y (+) but not K/Y (-).

Data: INTAN-Invest annual CSV (32 countries, 1995-2024)
  - Software & databases, R&D, other IPP (national-accounts intangibles)
  - Industrial design, financial products, market research, branding,
    organizational capital, employer training (non-NA intangibles)

Tests:
  1. Demographics -> intangible investment (parallel to phase 2 baselines)
  2. Tangible vs intangible decomposition (sign flip test)
  3. Intangible capital as mediator/suppressor of I/Y -> CA
  4. Intangible subcategory decomposition
  5. Robustness (NA-only measure, pre/post-GFC, excl financial centers)
"""

import sys, time
from pathlib import Path
import pandas as pd
import numpy as np
import requests
import warnings
warnings.filterwarnings('ignore')

PROJECT_DIR = Path(__file__).resolve().parent.parent
ROOT_DIR = PROJECT_DIR.parent
sys.path.insert(0, str(ROOT_DIR / "multilateral" / "src"))
from model import PanelGLS

DATA = PROJECT_DIR / "data" / "processed"
OUT_TABLES = PROJECT_DIR / "output" / "tables"
OUT_TABLES.mkdir(parents=True, exist_ok=True)

CONTROLS = ['fiscal_bal_gdp', 'nfa_gdp_lag', 'rgdp_growth', 'log_rel_opw', 'kaopen']

FINANCIAL_CENTERS = ['IRL', 'LUX', 'NLD', 'CYP', 'MLT']

# ISO3 mapping for INTAN-Invest country codes
INTAN_ISO3_MAP = {
    'AT': 'AUT', 'BE': 'BEL', 'BG': 'BGR', 'HR': 'HRV', 'CY': 'CYP',
    'CZ': 'CZE', 'DK': 'DNK', 'EE': 'EST', 'FI': 'FIN', 'FR': 'FRA',
    'DE': 'DEU', 'GR': 'GRC', 'HU': 'HUN', 'IE': 'IRL', 'IT': 'ITA',
    'LV': 'LVA', 'LT': 'LTU', 'LU': 'LUX', 'MT': 'MLT', 'NL': 'NLD',
    'PL': 'POL', 'PT': 'PRT', 'RO': 'ROU', 'SK': 'SVK', 'SI': 'SVN',
    'ES': 'ESP', 'SE': 'SWE', 'BR': 'BRA', 'IN': 'IND', 'JP': 'JPN',
    'UK': 'GBR', 'US': 'USA', 'EL': 'GRC',
    # Also handle ISO3 directly
    'AUT': 'AUT', 'BEL': 'BEL', 'BGR': 'BGR', 'HRV': 'HRV', 'CYP': 'CYP',
    'CZE': 'CZE', 'DNK': 'DNK', 'EST': 'EST', 'FIN': 'FIN', 'FRA': 'FRA',
    'DEU': 'DEU', 'GRC': 'GRC', 'HUN': 'HUN', 'IRL': 'IRL', 'ITA': 'ITA',
    'LVA': 'LVA', 'LTU': 'LTU', 'LUX': 'LUX', 'MLT': 'MLT', 'NLD': 'NLD',
    'POL': 'POL', 'PRT': 'PRT', 'ROU': 'ROU', 'SVK': 'SVK', 'SVN': 'SVN',
    'ESP': 'ESP', 'SWE': 'SWE', 'BRA': 'BRA', 'IND': 'IND', 'JPN': 'JPN',
    'GBR': 'GBR', 'USA': 'USA',
}


# ── Helpers (mirroring phase2/phase3 exactly) ────────────────────────────

def stars(p):
    if p < 0.01: return '***'
    if p < 0.05: return '**'
    if p < 0.1: return '*'
    return ''


def fmt(val, se, p):
    s = stars(p)
    return f"{val:.4f}{s}", f"({se:.4f})"


def run_panel_gls(df, y_var, x_vars, label, feature_names=None):
    """Run PanelGLS and return results dict."""
    cols = [y_var] + x_vars + ['iso3', 'year']
    sub = df[cols].dropna()
    if len(sub) < 50:
        print(f"  {label}: insufficient obs ({len(sub)}), skipping")
        return None

    gls = PanelGLS()
    y = sub[y_var].values
    X = sub[x_vars].values
    gls.fit(y, X, sub['iso3'].values, sub['year'].values)

    names = feature_names if feature_names else x_vars
    result = {
        'model': label,
        'n_obs': gls.n_obs,
        'n_countries': gls.n_countries,
        'r_squared': gls.r_squared,
        'rho': gls.rho,
    }
    for i, name in enumerate(names):
        result[f'{name}_coef'] = gls.beta[i]
        result[f'{name}_se'] = gls.se[i]
        result[f'{name}_p'] = gls.pvalues[i]

    print(f"\n  {label} (N={gls.n_obs}, R²={gls.r_squared:.4f})")
    for i, name in enumerate(names):
        sig = stars(gls.pvalues[i])
        print(f"    {name:30s} {gls.beta[i]:8.4f} ({gls.se[i]:.4f}) {sig}")

    return result


def write_table(results, filename, title):
    """Write regression results as markdown table."""
    if not results:
        return

    lines = [f"# {title}\n"]

    all_vars = []
    for r in results:
        for k in r:
            if k.endswith('_coef'):
                vname = k.replace('_coef', '')
                if vname not in all_vars:
                    all_vars.append(vname)

    model_labels = [r['model'] for r in results]
    header = "| Variable | " + " | ".join(model_labels) + " |"
    sep = "|:---|" + "|".join(["---:" for _ in results]) + "|"
    lines.append(header)
    lines.append(sep)

    for var in all_vars:
        coef_row = f"| {var} |"
        se_row = "| |"
        for r in results:
            if f'{var}_coef' in r:
                c, s = fmt(r[f'{var}_coef'], r[f'{var}_se'], r[f'{var}_p'])
                coef_row += f" {c} |"
                se_row += f" {s} |"
            else:
                coef_row += " |"
                se_row += " |"
        lines.append(coef_row)
        lines.append(se_row)

    lines.append("|:---|" + "|".join(["---:" for _ in results]) + "|")
    n_row = "| N |"
    r2_row = "| R² |"
    nc_row = "| Countries |"
    for r in results:
        n_row += f" {r['n_obs']} |"
        r2_row += f" {r['r_squared']:.4f} |"
        nc_row += f" {r['n_countries']} |"
    lines.append(n_row)
    lines.append(r2_row)
    lines.append(nc_row)

    lines.append("\n*Panel GLS with AR(1) correction. "
                 "Standard errors in parentheses.*")
    lines.append("*\\*p<0.1, \\*\\*p<0.05, \\*\\*\\*p<0.01*")

    path = OUT_TABLES / filename
    path.write_text('\n'.join(lines))
    print(f"\n  Saved: {path}")


def compute_attenuation(baseline, mediated, var='Z_1'):
    """Compute attenuation percentage for a given variable."""
    base_key = f'{var}_coef'
    if base_key not in baseline or base_key not in mediated:
        return None
    b_base = baseline[base_key]
    b_med = mediated[base_key]
    if abs(b_base) < 1e-10:
        return None
    return (b_base - b_med) / b_base * 100


# ── Data download and construction ──────────────────────────────────────

def download_intan_invest(cache_path):
    """Download INTAN-Invest annual CSV or use cache."""
    if cache_path.exists():
        print(f"  Using cached {cache_path.name}")
        return pd.read_csv(cache_path)

    # Try downloading from INTAN-Invest
    urls = [
        "https://global-intaninvest.luiss.it/wp-content/themes/wp-bootstrap-starter-child/download-2025/Annual/annual_extrapolated_estimates.csv",
        "https://global-intaninvest.luiss.it/wp-content/uploads/annual_extrapolated_estimates.csv",
        "https://global-intaninvest.luiss.it/data/annual_extrapolated_estimates.csv",
    ]

    for url in urls:
        try:
            print(f"  Trying {url}...")
            resp = requests.get(url, timeout=60)
            if resp.status_code == 200:
                cache_path.write_bytes(resp.content)
                print(f"  Downloaded INTAN-Invest data ({len(resp.content):,} bytes)")
                return pd.read_csv(cache_path)
        except Exception as e:
            print(f"  Failed: {e}")

    print("  WARNING: Could not download INTAN-Invest data.")
    print("  Please manually download from https://global-intaninvest.luiss.it/data-and-methods/")
    print(f"  and save to {cache_path}")
    return None


def construct_intangible_vars(intan_df):
    """
    Construct intangible investment variables from INTAN-Invest data.
    INTAN-Invest 2025 columns (current prices, millions national currency):
      I_GFCF       = Total GFCF
      I_NatAcc     = National accounts intangibles (software, R&D, artistic)
      I_TangNRes   = Tangible non-residential
      I_Cult       = Cultural/artistic originals (subset of NA intangibles)
      I_NonNatAcc  = Non-national-accounts intangibles
      I_OrgCap     = Organizational capital
      I_Design     = Design
      I_Brand      = Branding/marketing
      I_NFP        = New financial products
      I_Intan      = Total intangible (NA + non-NA)
      GDP          = GDP in millions national currency
    Returns DataFrame with iso3, year, and % GDP variables.
    """
    df = intan_df.copy()

    # Map country codes
    country_col = 'geo_code' if 'geo_code' in df.columns else df.columns[1]
    df['iso3'] = df[country_col].astype(str).str.strip().map(INTAN_ISO3_MAP)
    df['year'] = pd.to_numeric(df['year'], errors='coerce')
    df = df.dropna(subset=['iso3', 'year'])
    df['year'] = df['year'].astype(int)

    print(f"  INTAN-Invest: {df['iso3'].nunique()} countries, {df['year'].min()}-{df['year'].max()}")

    # Convert to numeric, handle 'NA' strings
    for col in ['I_Intan', 'I_NatAcc', 'I_NonNatAcc', 'I_OrgCap', 'I_Design',
                'I_Brand', 'I_NFP', 'I_Cult', 'I_GFCF', 'I_TangNRes', 'GDP']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')

    # Normalize to % GDP
    out = df[['iso3', 'year']].copy()

    if 'GDP' in df.columns and 'I_Intan' in df.columns:
        mask = (df['GDP'] > 0) & df['I_Intan'].notna()
        out.loc[mask, 'intangible_inv_raw'] = df.loc[mask, 'I_Intan']
        out.loc[mask, 'intangible_inv_gdp'] = df.loc[mask, 'I_Intan'] / df.loc[mask, 'GDP'] * 100

    if 'GDP' in df.columns and 'I_NatAcc' in df.columns:
        mask = (df['GDP'] > 0) & df['I_NatAcc'].notna()
        out.loc[mask, 'intangible_na_raw'] = df.loc[mask, 'I_NatAcc']
        out.loc[mask, 'intangible_na_gdp'] = df.loc[mask, 'I_NatAcc'] / df.loc[mask, 'GDP'] * 100

    if 'GDP' in df.columns and 'I_NonNatAcc' in df.columns:
        mask = (df['GDP'] > 0) & df['I_NonNatAcc'].notna()
        out.loc[mask, 'intangible_non_na_raw'] = df.loc[mask, 'I_NonNatAcc']
        out.loc[mask, 'intangible_non_na_gdp'] = df.loc[mask, 'I_NonNatAcc'] / df.loc[mask, 'GDP'] * 100

    # Subcategories (% GDP)
    subcats = {
        'org_capital': 'I_OrgCap',
        'design': 'I_Design',
        'branding': 'I_Brand',
        'new_fin_products': 'I_NFP',
        'cultural': 'I_Cult',
    }
    for name, col in subcats.items():
        if col in df.columns and 'GDP' in df.columns:
            vals = pd.to_numeric(df[col], errors='coerce')
            mask = (df['GDP'] > 0) & vals.notna()
            out.loc[mask, name] = vals[mask]
            out.loc[mask, f'{name}_gdp'] = vals[mask] / df.loc[mask, 'GDP'] * 100

    # Tangible non-residential as complement
    if 'I_TangNRes' in df.columns and 'GDP' in df.columns:
        mask = (df['GDP'] > 0) & df['I_TangNRes'].notna()
        out.loc[mask, 'tangible_nres_gdp'] = df.loc[mask, 'I_TangNRes'] / df.loc[mask, 'GDP'] * 100

    # R&D: I_NatAcc minus I_Cult gives software+R&D; use I_NatAcc as "R&D broad"
    # (INTAN-Invest doesn't separate software and R&D in current release)

    constructed = [c for c in out.columns if c not in ['iso3', 'year']]
    print(f"  Constructed: {constructed}")
    print(f"  intangible_inv_gdp: {out['intangible_inv_gdp'].notna().sum()} obs" if 'intangible_inv_gdp' in out.columns else "")

    return out


def merge_intangible_data(panel_df):
    """Download INTAN-Invest and merge into automation panel."""
    cache_path = DATA / 'intan_invest_annual.csv'
    intan_raw = download_intan_invest(cache_path)

    if intan_raw is None:
        print("  Generating synthetic intangible data from WDI proxies...")
        return construct_from_wdi(panel_df)

    intan = construct_intangible_vars(intan_raw)

    # Variables are already normalized to %GDP by construct_intangible_vars
    merge_cols = ['iso3', 'year'] + [c for c in intan.columns
                                      if c.endswith('_gdp') and c not in ['iso3', 'year']]
    merge_cols = list(set(merge_cols))

    panel_df = panel_df.merge(intan[merge_cols], on=['iso3', 'year'], how='left')

    # Construct tangible = gross_investment_gdp - intangible_na_gdp
    if 'intangible_na_gdp' in panel_df.columns and 'gross_investment_gdp' in panel_df.columns:
        panel_df['tangible_inv_gdp'] = panel_df['gross_investment_gdp'] - panel_df['intangible_na_gdp']

    n_intan = panel_df['intangible_inv_gdp'].notna().sum() if 'intangible_inv_gdp' in panel_df.columns else 0
    n_c = panel_df.loc[panel_df['intangible_inv_gdp'].notna(), 'iso3'].nunique() if n_intan > 0 else 0
    print(f"  Intangible data merged: {n_intan:,} obs, {n_c} countries")

    return panel_df


def construct_from_wdi(panel_df):
    """
    Fallback: construct intangible investment proxy from WDI R&D spending
    if INTAN-Invest download fails.
    """
    cache_path = DATA / 'wdi_rd_expenditure.csv'

    if cache_path.exists():
        rd = pd.read_csv(cache_path)
    else:
        print("  Downloading WDI R&D expenditure (GB.XPD.RSDV.GD.ZS)...")
        all_rows = []
        page = 1
        while True:
            url = (f"https://api.worldbank.org/v2/country/all/indicator/GB.XPD.RSDV.GD.ZS"
                   f"?format=json&per_page=1000&page={page}&date=1995:2023")
            try:
                resp = requests.get(url, timeout=60)
                resp.raise_for_status()
                data = resp.json()
                if len(data) < 2 or not data[1]:
                    break
                for rec in data[1]:
                    if rec['value'] is not None:
                        all_rows.append({
                            'iso3': rec['countryiso3code'],
                            'year': int(rec['date']),
                            'value': float(rec['value']),
                        })
                if page >= data[0].get('pages', 1):
                    break
                page += 1
                time.sleep(0.3)
            except Exception as e:
                print(f"  WDI download failed: {e}")
                break

        rd = pd.DataFrame(all_rows)
        if len(rd) > 0:
            rd.to_csv(cache_path, index=False)

    if len(rd) > 0:
        rd = rd.rename(columns={'value': 'rd_gdp'})
        panel_df = panel_df.merge(rd[['iso3', 'year', 'rd_gdp']], on=['iso3', 'year'], how='left')
        # Use R&D as proxy for intangible
        panel_df['intangible_inv_gdp'] = panel_df['rd_gdp']
        panel_df['intangible_na_gdp'] = panel_df['rd_gdp']
        panel_df['rd_gdp_sub'] = panel_df['rd_gdp']  # subcategory
        if 'capital_intensity' in panel_df.columns:
            panel_df['tangible_inv_gdp'] = panel_df['capital_intensity'] - panel_df['rd_gdp'].fillna(0)
        print(f"  Using WDI R&D as intangible proxy: {panel_df['rd_gdp'].notna().sum():,} obs")

    return panel_df


# ── Test 1: Demographics → intangible investment ────────────────────────

def test_baselines(df):
    """Table 1: Z → intangible investment (parallel to phase 2)."""
    print("\n" + "=" * 70)
    print("TEST 1: Demographics → Intangible Investment")
    print("=" * 70)

    results = []

    # Check which DVs are available
    dvs = {
        'intangible_inv_gdp': 'Total Intangible/GDP',
        'capital_intensity': 'Investment/GDP (I/Y)',
    }

    # M1: Z → intangible_inv_gdp (Z only)
    r = run_panel_gls(df, 'intangible_inv_gdp',
                      ['Z_1', 'Z_2', 'Z_3'],
                      'M1: Z→Intang')
    if r: results.append(r)

    # M2: Z → intangible_inv_gdp (+ controls)
    r = run_panel_gls(df, 'intangible_inv_gdp',
                      ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                      'M2: Z→Intang+ctrl')
    if r: results.append(r)

    # M3: Z → I/Y (for magnitude comparison, same sample)
    intan_countries = df.loc[df['intangible_inv_gdp'].notna(), 'iso3'].unique()
    sub32 = df[df['iso3'].isin(intan_countries)].copy()
    r = run_panel_gls(sub32, 'capital_intensity',
                      ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                      'M3: Z→I/Y (32c)')
    if r: results.append(r)

    # M4: old_dep / youth_dep → intangible
    r = run_panel_gls(df, 'intangible_inv_gdp',
                      ['old_dep', 'youth_dep'] + CONTROLS,
                      'M4: Age→Intang')
    if r: results.append(r)

    # M5: Z → intangible_na_gdp (NA intangibles only)
    if 'intangible_na_gdp' in df.columns and df['intangible_na_gdp'].notna().sum() > 50:
        r = run_panel_gls(df, 'intangible_na_gdp',
                          ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                          'M5: Z→NA-Intang')
        if r: results.append(r)

    write_table(results, "intangible_baselines.md",
                "Demographics → Intangible Investment")

    return results


# ── Test 2: Tangible vs intangible decomposition ────────────────────────

def test_tangible_vs_intangible(df):
    """Table 2: Z → tangible vs intangible (sign flip test)."""
    print("\n" + "=" * 70)
    print("TEST 2: Tangible vs. Intangible Decomposition")
    print("=" * 70)

    results = []

    # M1: Z → total I/Y
    r = run_panel_gls(df, 'capital_intensity',
                      ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                      'M1: Z→I/Y')
    if r: results.append(r)

    # M2: Z → intangible
    r = run_panel_gls(df, 'intangible_inv_gdp',
                      ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                      'M2: Z→Intangible')
    if r: results.append(r)

    # M3: Z → tangible (residual)
    if 'tangible_inv_gdp' in df.columns and df['tangible_inv_gdp'].notna().sum() > 50:
        r = run_panel_gls(df, 'tangible_inv_gdp',
                          ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                          'M3: Z→Tangible')
        if r: results.append(r)

    # M4: Z → K/Y (capital per worker, if available)
    if 'capital_per_worker' in df.columns and df['capital_per_worker'].notna().sum() > 50:
        r = run_panel_gls(df, 'capital_per_worker',
                          ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                          'M4: Z→K/L')
        if r: results.append(r)

    write_table(results, "tangible_vs_intangible.md",
                "Tangible vs. Intangible Investment Decomposition")

    return results


# ── Test 3: Intangible capital as mediator/suppressor ───────────────────

def test_mediation(df):
    """Table 3: Intangible investment as mediator of I/Y → CA suppression."""
    print("\n" + "=" * 70)
    print("TEST 3: Intangible Capital as Mediator")
    print("=" * 70)

    # Restrict to 32-country INTAN-Invest sample
    intan_countries = df.loc[df['intangible_inv_gdp'].notna(), 'iso3'].unique()
    sub = df[df['iso3'].isin(intan_countries)].copy()
    print(f"  INTAN-Invest subsample: {len(sub):,} obs, {sub['iso3'].nunique()} countries")

    results = []

    # Step 1: Z → intangible (first stage)
    r_s1 = run_panel_gls(sub, 'intangible_inv_gdp',
                         ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                         'S1: Z→Intang')
    if r_s1: results.append(r_s1)

    # Step 2: Z → CA (baseline, 32-country subsample)
    r_s2 = run_panel_gls(sub, 'ca_gdp',
                         ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                         'S2: Z→CA')
    if r_s2: results.append(r_s2)

    # Step 3: Z + intangible → CA (mediation)
    r_s3 = run_panel_gls(sub, 'ca_gdp',
                         ['Z_1', 'Z_2', 'Z_3', 'intangible_inv_gdp'] + CONTROLS,
                         'S3: Z+Intang→CA')
    if r_s3: results.append(r_s3)

    # Step 4: intangible only → CA
    r_s4 = run_panel_gls(sub, 'ca_gdp',
                         ['intangible_inv_gdp'] + CONTROLS,
                         'S4: Intang→CA')
    if r_s4: results.append(r_s4)

    # Step 5: Compare to I/Y suppression
    r_s5 = run_panel_gls(sub, 'ca_gdp',
                         ['Z_1', 'Z_2', 'Z_3', 'capital_intensity'] + CONTROLS,
                         'S5: Z+I/Y→CA')
    if r_s5: results.append(r_s5)

    write_table(results, "intangible_ca_mediation.md",
                "Intangible Capital Mediation: Z → Intangible → CA")

    # Attenuation report
    if r_s2 and r_s3:
        print("\n--- Attenuation Report (Intangible) ---")
        for zvar in ['Z_1', 'Z_2', 'Z_3']:
            atten = compute_attenuation(r_s2, r_s3, zvar)
            if atten is not None:
                b_base = r_s2[f'{zvar}_coef']
                b_med = r_s3[f'{zvar}_coef']
                print(f"  {zvar}: {b_base:.4f} -> {b_med:.4f} (attenuation: {atten:.1f}%)")

    if r_s2 and r_s5:
        print("\n--- Attenuation Report (I/Y for comparison) ---")
        for zvar in ['Z_1', 'Z_2', 'Z_3']:
            atten = compute_attenuation(r_s2, r_s5, zvar)
            if atten is not None:
                b_base = r_s2[f'{zvar}_coef']
                b_med = r_s5[f'{zvar}_coef']
                print(f"  {zvar}: {b_base:.4f} -> {b_med:.4f} (attenuation: {atten:.1f}%)")

    return results, r_s2, r_s3, r_s5


# ── Test 4: Intangible subcategory decomposition ────────────────────────

def test_subcategories(df):
    """Table 4: Z → individual intangible subcategories."""
    print("\n" + "=" * 70)
    print("TEST 4: Intangible Subcategory Decomposition")
    print("=" * 70)

    subcats = {
        'rd_gdp': 'R&D',
        'software_db_gdp': 'Software & DB',
        'org_capital_gdp': 'Org Capital',
        'training_gdp': 'Training',
        'design_gdp': 'Design',
        'branding_gdp': 'Branding',
        'rd_gdp_sub': 'R&D (WDI)',
    }

    results = []
    for var, label in subcats.items():
        if var in df.columns and df[var].notna().sum() > 50:
            r = run_panel_gls(df, var,
                              ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                              f'Z→{label}')
            if r:
                results.append(r)

    if results:
        write_table(results, "intangible_subcategories.md",
                    "Demographics → Intangible Subcategories")
    else:
        print("  No subcategory data available")

    return results


# ── Test 5: Robustness ──────────────────────────────────────────────────

def test_robustness(df):
    """Table 5: Robustness — NA-only measure, pre/post-GFC, excl financial centers."""
    print("\n" + "=" * 70)
    print("TEST 5: Robustness")
    print("=" * 70)

    results = []

    # R1: NA-only intangibles
    if 'intangible_na_gdp' in df.columns and df['intangible_na_gdp'].notna().sum() > 50:
        r = run_panel_gls(df, 'intangible_na_gdp',
                          ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                          'R1: NA-only')
        if r: results.append(r)

    # R2: Non-NA intangibles
    if 'intangible_non_na_gdp' in df.columns and df['intangible_non_na_gdp'].notna().sum() > 50:
        r = run_panel_gls(df, 'intangible_non_na_gdp',
                          ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                          'R2: Non-NA')
        if r: results.append(r)

    # R3: Pre-GFC (1995-2007)
    pre_gfc = df[df['year'] <= 2007].copy()
    r = run_panel_gls(pre_gfc, 'intangible_inv_gdp',
                      ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                      'R3: Pre-GFC')
    if r: results.append(r)

    # R4: Post-GFC (2010+)
    post_gfc = df[df['year'] >= 2010].copy()
    r = run_panel_gls(post_gfc, 'intangible_inv_gdp',
                      ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                      'R4: Post-GFC')
    if r: results.append(r)

    # R5: Exclude financial centers
    excl_fc = df[~df['iso3'].isin(FINANCIAL_CENTERS)].copy()
    r = run_panel_gls(excl_fc, 'intangible_inv_gdp',
                      ['Z_1', 'Z_2', 'Z_3'] + CONTROLS,
                      'R5: Excl FC')
    if r: results.append(r)

    if results:
        write_table(results, "intangible_robustness.md",
                    "Intangible Investment — Robustness")

    return results


# ── Save results CSV ─────────────────────────────────────────────────────

def save_results_csv(all_results):
    """Save all regression coefficients to CSV."""
    rows = []
    for test_name, results_list in all_results.items():
        if not isinstance(results_list, list):
            continue
        for r in results_list:
            if r is None:
                continue
            for k in r:
                if k.endswith('_coef'):
                    var = k.replace('_coef', '')
                    rows.append({
                        'test': test_name,
                        'model': r['model'],
                        'variable': var,
                        'coefficient': r[f'{var}_coef'],
                        'std_error': r[f'{var}_se'],
                        'p_value': r[f'{var}_p'],
                        'N': r['n_obs'],
                        'N_countries': r['n_countries'],
                        'R2': r['r_squared'],
                    })

    if rows:
        out = pd.DataFrame(rows)
        out.to_csv(OUT_TABLES / 'phase10_results.csv', index=False)
        print(f"\n  Saved phase10_results.csv ({len(out)} rows)")


# ── main ─────────────────────────────────────────────────────────────────

def main():
    print("=" * 70)
    print("PHASE 10: INTANGIBLE CAPITAL AND THE I/Y-K/Y PUZZLE")
    print("=" * 70)

    # Load panel
    df = pd.read_csv(DATA / "automation_panel.csv")
    print(f"  Loaded automation panel: {len(df):,} obs, {df['iso3'].nunique()} countries")

    # Merge intangible data
    df = merge_intangible_data(df)

    # Run tests
    all_results = {}
    all_results['baselines'] = test_baselines(df)
    all_results['tangible_vs_intangible'] = test_tangible_vs_intangible(df)

    mediation_results = test_mediation(df)
    all_results['mediation'] = mediation_results[0]

    all_results['subcategories'] = test_subcategories(df)
    all_results['robustness'] = test_robustness(df)

    # Save
    save_results_csv(all_results)

    print("\n" + "=" * 70)
    print("Phase 10 complete.")
    print("=" * 70)


if __name__ == '__main__':
    main()
